##  Randomization inference for EXPERIMENT 1 for sponsorship paper
rm(list = ls())

library(acs)
library(RCurl)
library(RJSONIO)


setwd("~/Dropbox/Current projects/kern-pietryka-crabtree/data/")
dat <- read.csv("sponsorship_1_v3_November+6%2C+2017_12.32.csv", header = TRUE)
dat <- dat[-c(1:2),]

dim(dat)
colnames(dat)


##  drop test responses (based on timing and location)
dat <- dat[-c(1:39),]

##  consistency check: only real responses are included
table(dat$Status)


##  drop responses that were not completed (this might be a problem if it were common but
##  it is not)
sel <- dat$Finished == "True"
table(sel)
dat <- dat[sel,]


##  drop respondents who don't give consent
sel <- dat$Q1.2 == "Yes"
table(sel) 
dat <- dat[sel,]


##  any respondents who answer multiple times?
stopifnot(length(dat$mTurkCode) == length(unique(dat$mTurkCode)))


##  drop respondents based on lat/long
latlong2fips <- function(latitude, longitude) {
  url <- "http://data.fcc.gov/api/block/find?format=json&latitude=%f&longitude=%f"
  url <- sprintf(url, latitude, longitude)
  json <- RCurl::getURL(url)
  json <- RJSONIO::fromJSON(json)
  as.character(json$County['FIPS'])
}
key <- api.key.install("f66712573be9fdfd1264a2d99bc0356a4b3edaf7")


###  this needs to be fixed to work properly; new web address

latlong2fips <- 
function(latitude, longitude) {
  url <- "https://geo.fcc.gov/api/census/block/find?latitude=%f&longitude=%f&format=json"
  url <- sprintf(url, latitude, longitude)
  json <- RCurl::getURL(url)
  json <- RJSONIO::fromJSON(json)
  as.character(json$County['FIPS'])
}



dat$state <- NA

m <- 1
for(m in 1:nrow(dat))	{

	temp <- latlong2fips(latitude = as.numeric(as.character(dat$LocationLatitude[m])),
						 longitude = as.numeric(as.character(dat$LocationLongitude[m])))

if(is.null(temp))	{	cat(m,as.numeric(as.character(dat$LocationLatitude[m])),
						as.numeric(as.character(dat$LocationLongitude[m])),"\n"); next() }
if(is.na(temp)) 	{	cat(m,as.numeric(as.character(dat$LocationLatitude[m])),
						as.numeric(as.character(dat$LocationLongitude[m])),"\n"); next() }
if(temp == "NULL")	{	cat(m,as.numeric(as.character(dat$LocationLatitude[m])),
						as.numeric(as.character(dat$LocationLongitude[m])),"\n"); next() }

	sel <- which(fips.state[1:51,1] == as.numeric(substr(temp,1,2)))
	temp <- fips.state[sel,2]

	if(length(temp) == 0)	{	cat(m,as.numeric(as.character(dat$LocationLatitude[m])),
								as.numeric(as.character(dat$LocationLongitude[m])),"\n");
								next()
							}

	dat$state[m] <- temp

	if(m %% 100 == 0)	{ cat(m, "\n") }

						}

##  drop respondents not from the 50 U.S. states
sel <- is.na(dat$state)
table(sel)
dat <- dat[!sel,]
dim(dat)

table(dat$state)



##  Deal with identical lat/long coordinates
##  Drop respondent if previous respondent with identical lat/long exists in data
dat$latlong <- paste(dat$LocationLatitude, dat$LocationLongitude, sep = "/")

dat$end.date <- strptime(dat[,2], format = "%Y-%m-%d %H:%M:%S")
dat <- dat[order(dat$end.date),]

##  consistency check
stopifnot(all(dat$end.date == sort(dat$end.date)))

dat$repeated <- NA
dat$repeated[1] <- FALSE

m <- 2
for(m in 2:nrow(dat))	{

	if(!any(dat$latlong[m] == dat$latlong[1:(m-1)])) { dat$repeated[m] <- FALSE } else
	{ dat$repeated[m] <- TRUE }

						}

table(dat$repeated)
dat <- dat[!dat$repeated,]



dat$question1 <- NA 	# abortion
dat$question2 <- NA 	# birth control
dat$question3 <- NA 	# religious attendance
dat$question4 <- NA 	# prayer

dat$Tr <- NA

m <- 1
for(m in 1:nrow(dat))	{

	temp <- 			c(	as.character(dat$Q2.1[m]),
							as.character(dat$Q5.3[m]),
							as.character(dat$Q7.3[m]))

	dat$Tr[m] <- which(temp != "")	##  order: No info, OSU, ND
	dat$question1[m] <- temp[temp != ""]

	temp <- 			c(	as.character(dat$Q2.2[m]),
							as.character(dat$Q5.4[m]),
							as.character(dat$Q7.4[m]))
	dat$question2[m] <- temp[temp != ""]

	temp <- 			c(	as.character(dat$Q3.1[m]),
							as.character(dat$Q4.3[m]),
							as.character(dat$Q6.3[m]))
	dat$question3[m] <- temp[temp != ""]

	temp <- 			c(	as.character(dat$Q3.2[m]),
							as.character(dat$Q4.4[m]),
							as.character(dat$Q6.4[m]))
	dat$question4[m] <- temp[temp != ""]

						}

table(dat$Tr)

##  test whether assignment is as good as random
chisq.test(table(dat$Tr), simulate.p.value = TRUE, B = 10000)


table(dat$question1)
table(dat$question2)
table(dat$question3)
table(dat$question4)

# save.image("Experiment 1 intermediate results.RData")
rm(sel,m,latlong2fips,key,temp)



##
##  Randomization inference
##
s <- 1000000

##  compute sample test statistic for all four outcomes and all three-way comparisons
##  we don't care about warning messages since we only use test statistics, not Chi-square
##  approximation
t.stat.1.1v2 <- chisq.test(table(dat$question1[dat$Tr != 3], dat$Tr[dat$Tr != 3]))$stat
t.stat.1.1v3 <- chisq.test(table(dat$question1[dat$Tr != 2], dat$Tr[dat$Tr != 2]))$stat
t.stat.1.2v3 <- chisq.test(table(dat$question1[dat$Tr != 1], dat$Tr[dat$Tr != 1]))$stat

t.stat.2.1v2 <- chisq.test(table(dat$question2[dat$Tr != 3], dat$Tr[dat$Tr != 3]))$stat
t.stat.2.1v3 <- chisq.test(table(dat$question2[dat$Tr != 2], dat$Tr[dat$Tr != 2]))$stat
t.stat.2.2v3 <- chisq.test(table(dat$question2[dat$Tr != 1], dat$Tr[dat$Tr != 1]))$stat

t.stat.3.1v2 <- chisq.test(table(dat$question3[dat$Tr != 3], dat$Tr[dat$Tr != 3]))$stat
t.stat.3.1v3 <- chisq.test(table(dat$question3[dat$Tr != 2], dat$Tr[dat$Tr != 2]))$stat
t.stat.3.2v3 <- chisq.test(table(dat$question3[dat$Tr != 1], dat$Tr[dat$Tr != 1]))$stat

t.stat.4.1v2 <- chisq.test(table(dat$question4[dat$Tr != 3], dat$Tr[dat$Tr != 3]))$stat
t.stat.4.1v3 <- chisq.test(table(dat$question4[dat$Tr != 2], dat$Tr[dat$Tr != 2]))$stat
t.stat.4.2v3 <- chisq.test(table(dat$question4[dat$Tr != 1], dat$Tr[dat$Tr != 1]))$stat


t.stat.rand <- matrix(NA,s,12)


set.seed(1)
for(m in 1:s)	{

	dat$Tr.r <- sample(c(1,2,3), nrow(dat), replace = TRUE)

	t.stat.rand[m,1] <-  chisq.test(table(dat$question1[dat$Tr.r != 3], dat$Tr.r[dat$Tr.r != 3]))$stat
	t.stat.rand[m,2] <-  chisq.test(table(dat$question1[dat$Tr.r != 2], dat$Tr.r[dat$Tr.r != 2]))$stat
	t.stat.rand[m,3] <-  chisq.test(table(dat$question1[dat$Tr.r != 1], dat$Tr.r[dat$Tr.r != 1]))$stat

	t.stat.rand[m,4] <-  chisq.test(table(dat$question2[dat$Tr.r != 3], dat$Tr.r[dat$Tr.r != 3]))$stat
	t.stat.rand[m,5] <-  chisq.test(table(dat$question2[dat$Tr.r != 2], dat$Tr.r[dat$Tr.r != 2]))$stat
	t.stat.rand[m,6] <-  chisq.test(table(dat$question2[dat$Tr.r != 1], dat$Tr.r[dat$Tr.r != 1]))$stat

	t.stat.rand[m,7] <-  chisq.test(table(dat$question3[dat$Tr.r != 3], dat$Tr.r[dat$Tr.r != 3]))$stat
	t.stat.rand[m,8] <-  chisq.test(table(dat$question3[dat$Tr.r != 2], dat$Tr.r[dat$Tr.r != 2]))$stat
	t.stat.rand[m,9] <-  chisq.test(table(dat$question3[dat$Tr.r != 1], dat$Tr.r[dat$Tr.r != 1]))$stat

	t.stat.rand[m,10] <- chisq.test(table(dat$question4[dat$Tr.r != 3], dat$Tr.r[dat$Tr.r != 3]))$stat
	t.stat.rand[m,11] <- chisq.test(table(dat$question4[dat$Tr.r != 2], dat$Tr.r[dat$Tr.r != 2]))$stat
	t.stat.rand[m,12] <- chisq.test(table(dat$question4[dat$Tr.r != 1], dat$Tr.r[dat$Tr.r != 1]))$stat


	if(m %% 1000 == 0)	{ cat(m, "\n") }

				}

# save.image("Experiment 1 results.RData")
# load("Experiment 1 results.RData")



p <- matrix(NA,4,4)
colnames(p) <- c("No logo vs. OSU","No logo vs. ND","OSU vs. ND","Omnibus")
rownames(p) <- c("Abortion","Birth control","Religious attendance","Prayer")

##  p-values for each outcome and each two-way comparison
p[1,1:3] <- 

c(
mean(t.stat.1.1v2 < t.stat.rand[,1]),
mean(t.stat.1.1v3 < t.stat.rand[,2]),
mean(t.stat.1.2v3 < t.stat.rand[,3]))


p[2,1:3] <- 

c(
mean(t.stat.2.1v2 < t.stat.rand[,4]),
mean(t.stat.2.1v3 < t.stat.rand[,5]),
mean(t.stat.2.2v3 < t.stat.rand[,6]))


p[3,1:3] <- 

c(
mean(t.stat.3.1v2 < t.stat.rand[,7]),
mean(t.stat.3.1v3 < t.stat.rand[,8]),
mean(t.stat.3.2v3 < t.stat.rand[,9]))


p[4,1:3] <- 

c(
mean(t.stat.4.1v2 < t.stat.rand[,10]),
mean(t.stat.4.1v3 < t.stat.rand[,11]),
mean(t.stat.4.2v3 < t.stat.rand[,12]))



##  Using Young's approach to combine test statistics
Omega.1 <- solve(var(t.stat.rand[,1:3]))
Omega.2 <- solve(var(t.stat.rand[,4:6]))
Omega.3 <- solve(var(t.stat.rand[,7:9]))
Omega.4 <- solve(var(t.stat.rand[,10:12]))

t.stat.1 <-	t(c(t.stat.1.1v2,t.stat.1.1v3,t.stat.1.2v3)^2) %*%
			Omega.1 %*%
		  t(t(c(t.stat.1.1v2,t.stat.1.1v3,t.stat.1.2v3)^2))

t.stat.2 <-	t(c(t.stat.2.1v2,t.stat.2.1v3,t.stat.2.2v3)^2) %*%
			Omega.2 %*%
		  t(t(c(t.stat.2.1v2,t.stat.2.1v3,t.stat.2.2v3)^2))

t.stat.3 <-	t(c(t.stat.3.1v2,t.stat.3.1v3,t.stat.3.2v3)^2) %*%
			Omega.3 %*%
		  t(t(c(t.stat.3.1v2,t.stat.3.1v3,t.stat.3.2v3)^2))

t.stat.4 <-	t(c(t.stat.4.1v2,t.stat.4.1v3,t.stat.4.2v3)^2) %*%
			Omega.4 %*%
		  t(t(c(t.stat.4.1v2,t.stat.4.1v3,t.stat.4.2v3)^2))


out.1 <- rep(NA,nrow(t.stat.rand))
out.2 <- rep(NA,nrow(t.stat.rand))
out.3 <- rep(NA,nrow(t.stat.rand))
out.4 <- rep(NA,nrow(t.stat.rand))

m <- 1
for(m in 1:nrow(t.stat.rand))	{

	out.1[m] <- t.stat.rand[m,1:3]^2   %*% Omega.1 %*% t(t(t.stat.rand[m,1:3]^2))
	out.2[m] <- t.stat.rand[m,4:6]^2   %*% Omega.2 %*% t(t(t.stat.rand[m,4:6]^2))
	out.3[m] <- t.stat.rand[m,7:9]^2   %*% Omega.3 %*% t(t(t.stat.rand[m,7:9]^2))
	out.4[m] <- t.stat.rand[m,10:12]^2 %*% Omega.4 %*% t(t(t.stat.rand[m,10:12]^2))

								}

p[1,4] <- mean(as.vector(t.stat.1) < out.1)
p[2,4] <- mean(as.vector(t.stat.2) < out.2)
p[3,4] <- mean(as.vector(t.stat.3) < out.3)
p[4,4] <- mean(as.vector(t.stat.4) < out.4)

p2 <- round(p,2)

library(Hmisc)
latex(p2, file = "")



##  using Young's approach for global p-value
Omega <- solve(var(t.stat.rand))

t.stat <-	t(c(t.stat.1.1v2,t.stat.1.1v3,t.stat.1.2v3,
				t.stat.2.1v2,t.stat.2.1v3,t.stat.2.2v3,
				t.stat.3.1v2,t.stat.3.1v3,t.stat.3.2v3,
				t.stat.4.1v2,t.stat.4.1v3,t.stat.4.2v3)^2) %*%
				Omega %*%
		  t(t(c(t.stat.1.1v2,t.stat.1.1v3,t.stat.1.2v3,
				t.stat.2.1v2,t.stat.2.1v3,t.stat.2.2v3,
				t.stat.3.1v2,t.stat.3.1v3,t.stat.3.2v3,
				t.stat.4.1v2,t.stat.4.1v3,t.stat.4.2v3)^2))

out <- rep(NA,nrow(t.stat.rand))

m <- 1
for(m in 1:nrow(t.stat.rand))	{

	out[m] <- t.stat.rand[m,]^2 %*% Omega %*% t(t(t.stat.rand[m,]^2))

								}

round(mean(as.vector(t.stat) < out),2)



##  create outcomes table
temp <- table(dat$question1, dat$Tr)
latex(temp, file = "")

temp <- table(dat$question2, dat$Tr)
latex(temp, file = "")

temp <- table(dat$question3, dat$Tr)
latex(temp, file = "")

temp <- table(dat$question4, dat$Tr)
latex(temp, file = "")







.
